Inés Sentís
Date of execution
Sys.Date()
Normalize data and create embeddings for the each time point fraction
sample <- "41BBneg"
suppressMessages(suppressWarnings({
library(Seurat)
library(tidyverse)
library(grid)
library(gridExtra)
library(ggplot2)
library(scater)
library(scran)
}))
#here::dr_here(show_reason = TRUE)
source(here::here("SCGRES_124_125/sc_analysis/misc/paths.R"))
source(here::here("SCGRES_124_125/sc_analysis/misc/variables.R"))
"{clust}/{plt_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
"{clust}/{robj_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
set.seed(0)
seurat_obj <- readRDS(here::here(glue::glue("{qc}/{robj_dir}/clean_combined_object_{sample}.rds")))
var_name <- paste0("comp_", sample)
dcomp <- get(var_name)
seurat_obj <- NormalizeData(
seurat_obj,
normalization.method = "LogNormalize",
scale.factor = 1e4
)
sce <- as.SingleCellExperiment(seurat_obj)
sce
class: SingleCellExperiment dim: 25556 11297 metadata(0): assays(2): counts logcounts rownames(25556): AL627309.1 AL627309.5 ... AC004556.3 AC007325.4 rowData names(0): colnames(11297): AAACCTGAGAGTACAT-1 AAACCTGAGATATGCA-1 ... TTTGTCATCGCCGTGA-1 TTTGTCATCTGTTGAG-1 colData names(12): orig.ident nCount_RNA ... doublet_pred ident reducedDimNames(0): mainExpName: RNA altExpNames(0):
gene_var <- modelGeneVar(sce)
tops <- gene_var %>%
as.data.frame() %>%
arrange(desc(total)) %>%
head(n=20)
gene_var %>%
as.data.frame() %>%
ggplot(aes(mean, total)) +
geom_point() +
geom_line(aes(y = tech), colour = "dodgerblue", size = 1) +
labs(x = "Mean of log-expression", y = "Variance of log-expression")+
geom_text(data=tops, aes(mean,total,label=rownames(tops)))
Warning message:
“Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.”
gene_var %>%
as.data.frame() %>%
arrange(desc(total)) %>%
head(n=20)
| mean | total | tech | bio | p.value | FDR | |
|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| CCL5 | 3.2226963 | 2.6094585 | 0.2904358 | 2.3190227 | 0.000000e+00 | 0.000000e+00 |
| GNLY | 0.9423085 | 2.5311915 | 0.5135591 | 2.0176325 | 2.568624e-262 | 3.237750e-258 |
| AL138963.4 | 1.7397791 | 1.3368883 | 0.4721406 | 0.8647477 | 9.119723e-59 | 6.966916e-56 |
| LGALS1 | 1.5077867 | 1.2323799 | 0.5041183 | 0.7282616 | 2.435389e-37 | 9.302447e-35 |
| GZMA | 1.7955862 | 1.1964215 | 0.4617751 | 0.7346464 | 7.492485e-45 | 3.935116e-42 |
| SELL | 1.1491739 | 1.1317825 | 0.5282525 | 0.6035300 | 4.322928e-24 | 1.197594e-21 |
| NKG7 | 0.9150150 | 1.1207335 | 0.5096809 | 0.6110527 | 2.478304e-26 | 7.181385e-24 |
| HIST1H1E | 1.3259846 | 1.0256316 | 0.5212714 | 0.5043602 | 8.246346e-18 | 1.718102e-15 |
| IL7R | 1.8839640 | 0.9879611 | 0.4441444 | 0.5438167 | 2.211869e-27 | 6.560142e-25 |
| CD74 | 2.0870637 | 0.9350074 | 0.4050374 | 0.5299700 | 5.442901e-31 | 1.759173e-28 |
| HOPX | 0.9227670 | 0.8812963 | 0.5108211 | 0.3704753 | 8.655446e-11 | 1.069626e-08 |
| CD8B | 0.9514017 | 0.8688200 | 0.5147557 | 0.3540643 | 7.061222e-10 | 7.672992e-08 |
| CTSW | 1.4425171 | 0.8642003 | 0.5109315 | 0.3532688 | 5.799620e-10 | 6.384647e-08 |
| HIST1H4C | 1.2171291 | 0.8462008 | 0.5274776 | 0.3187231 | 5.235735e-08 | 4.313493e-06 |
| ZNF683 | 0.5486858 | 0.8161550 | 0.4056851 | 0.4104700 | 2.656749e-19 | 6.144647e-17 |
| TUBA1B | 0.6221603 | 0.7982242 | 0.4360284 | 0.3621958 | 1.322677e-13 | 2.137481e-11 |
| HIST1H1D | 0.8954463 | 0.7956085 | 0.5066481 | 0.2889603 | 2.584339e-07 | 1.840429e-05 |
| TUBB | 1.1928595 | 0.7947072 | 0.5280085 | 0.2666987 | 4.378871e-06 | 2.363840e-04 |
| CD8A | 0.9704281 | 0.7923661 | 0.5170880 | 0.2752781 | 1.395038e-06 | 8.433793e-05 |
| KLRK1 | 0.9699119 | 0.7823997 | 0.5170276 | 0.2653721 | 3.127099e-06 | 1.767582e-04 |
hvgs <- getTopHVGs(gene_var,fdr.threshold = 0.05)
length(hvgs)
hvgs <- hvgs[!grepl("^TR[AB][VJC]", hvgs)]
length(hvgs)
seurat_obj <- seurat_obj %>%
ScaleData(features=hvgs) %>%
RunPCA(features=hvgs)
Centering and scaling data matrix PC_ 1 Positive: LTB, FTL, IL7R, TXNIP, EEF1B2, CD52, S1PR1, CCR7, RIPOR2, SELL HCST, NOSIP, TCF7, MTRNR2L12, MALAT1, TMSB10, AL138963.4, NELL2, CR1, FCER1G S100B, IL32, IGLV2-14, CTSW, S100A4, CCL5, IFI44L, GZMK, PDE3B, AIRE Negative: UBE2C, TOP2A, ASPM, CDK1, KIFC1, DLGAP5, GTSE1, RRM2, HJURP, CKAP2L CDCA8, CDCA2, HMMR, KNL1, BIRC5, CENPF, MKI67, NUSAP1, KIF23, TPX2 KIF15, PLK1, KIF14, CDCA3, CCNB2, CCNA2, KIF2C, ANLN, CDKN3, NCAPG PC_ 2 Positive: EEF1B2, IL7R, PDE7B, BEX3, SELL, TPM2, TCF7, CCR7, S1PR1, MYL9 CRABP2, ALDH7A1, S100A13, KRT1, TNNT1, PDE3B, PEG3, MDK, H19, MYH8 AC104823.1, PAX3, LINC00271, AL138963.4, S100A1, MT1A, SNCG, STAR, TUBB3, RBMS3 Negative: NKG7, CCL5, GNLY, CST7, CCL4, CD74, GZMB, GZMH, HLA-DRB1, HLA-DRA GZMK, CXCR3, CD8A, HLA-DPA1, ALOX5AP, ZNF683, HOPX, HLA-DQA1, HLA-DPB1, CCL4L2 KLRK1, GZMA, AOAH, OASL, CTSW, CD52, EOMES, HCST, LINC02694, AC243829.4 PC_ 3 Positive: IFITM1, MALAT1, IFITM2, LTB, CD52, SELL, IL7R, RIPOR2, TXNIP, IL32 MTRNR2L12, CD7, S1PR1, CCR7, NOSIP, PDE3B, TCF7, NELL2, LRRN3, CTSW VIM, ASPM, CKAP2L, HCST, CD96, EEF1B2, KIF15, KNL1, DIAPH3, KIF14 Negative: CRABP2, TPM2, S100A13, MYL9, ALDH7A1, TNNT1, PEG3, BEX3, MDK, MYH8 S100A1, MT1A, PAX3, H19, AC104823.1, SNCG, SPARC, MT1M, TUBB3, STAR TCIM, APOC1, LINC00882, COL1A1, TCF21, PALMD, DMKN, FGFBP1, KCNE5, PITX2 PC_ 4 Positive: IFITM2, CTSW, TMSB10, S100A4, CD52, IFITM1, LTB, HOPX, CD7, FTL HCST, NOSIP, S100A6, CCL5, SELL, LGALS1, CD8B, LINC02446, XCL1, KLRC4 CD8A, KLRK1, CCR7, SPINK2, ZNF683, CXCR3, S1PR1, FXYD2, EEF1B2, S100B Negative: PDE7B, MALAT1, GZMH, RBMS3, PDE3B, CCL4, EOMES, LINC00271, LINC02694, CCL4L2 AC243829.4, KRT1, TIGIT, CST7, ZEB2, HPGDS, PPP1R14B, RTKN2, CCL3, PLEK GZMB, MTRNR2L12, AC013652.1, HLA-DQA1, C1orf21, OASL, GZMK, FCRL6, ITGB1, HLA-DPA1 PC_ 5 Positive: GAPDH, IL32, ACTG1, SPINK2, XCL1, CD7, ENO1, DDIT4, CXCR6, IFITM2 KRT1, FTL, RBMS3, LGALS1, IFITM1, PDE7B, KRT86, CD96, XCL2, LDHA HPGDS, ANXA1, FOXP3, CST7, HIST1H1C, RTKN2, HMGN2, HCST, KLRB1, ALOX5AP Negative: RIPOR2, SELL, S1PR1, CCR7, NOSIP, TMSB10, TXNIP, CD8B, GZMK, C1orf21 CR1, NELL2, LYAR, GZMH, HLA-DPB1, AHNAK, IGLV2-14, IL7R, LTB, EOMES MALAT1, HLA-DPA1, FGFBP2, CD8A, HLA-DRA, TCF7, PLEK, CCL4, LINC02446, PPP1R14B
options(repr.plot.width = 8, repr.plot.height = 6)
ElbowPlot(seurat_obj, n=50)
options(repr.plot.width = 14, repr.plot.height = 6)
FeaturePlot(object = seurat_obj, reduction = "pca",
features = c("nCount_RNA","nFeature_RNA"), order=T)
options(repr.plot.width = 16, repr.plot.height = 16, warn=-1,verbose = FALSE)
genes = c("CD3E", "CD3D",
"CD4", "CD8A","CD8B","FOXP3",
"TOP2A", "IL7R","SELL")
FeaturePlot(seurat_obj, reduction = "pca",
feature=genes, order = F, ncol=3)
options(repr.plot.width = 10, repr.plot.height = 6, warn=-1,verbose = FALSE)
VizDimLoadings(seurat_obj, dims = 1:2, reduction = "pca")
seurat_obj <- RunUMAP(
seurat_obj,
dims = dcomp,
reduction = "pca",
reduction.name = "umap",
reduction.key = "UMAP_"
)
15:54:20 UMAP embedding parameters a = 0.9922 b = 1.112 15:54:20 Read 11297 rows and found 15 numeric columns 15:54:20 Using Annoy for neighbor search, n_neighbors = 30 15:54:20 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 15:54:21 Writing NN index file to temp file /scratch_tmp/33937423/RtmpgIQqMo/file3ee442f067cf1 15:54:21 Searching Annoy index using 1 thread, search_k = 3000 15:54:25 Annoy recall = 100% 15:54:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 15:54:27 Initializing from normalized Laplacian + noise (using irlba) 15:54:27 Commencing optimization for 200 epochs, with 475194 positive edges 15:54:40 Optimization finished
options(repr.plot.width = 8, repr.plot.height = 6, warn=-1,verbose = FALSE)
DimPlot(
seurat_obj,
reduction = "umap",
pt.size = 0.1
) + ggtitle('UMAP') +
theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
seurat_obj <- CellCycleScoring(seurat_obj, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
cat_vars <-c("Phase")
con_vars <- c("nCount_RNA", "nFeature_RNA", "pct_mt", "percent.ribo",
"doublet_score", "PTPRC", "CD4", "CD8A", "CD8B", "TOP2A")
vars <- c(cat_vars, con_vars)
# compute plots
list_plots <- lapply(vars, function(var){
if (var %in% cat_vars) {
p <- DimPlot(seurat_obj, reduction = "umap", group.by=var)
} else {
p <- FeaturePlot(seurat_obj, reduction = "umap", feature=var, order = TRUE)
}
return(p)
})
options(repr.plot.width = 16, repr.plot.height = 12, warn=-1,verbose = FALSE)
# show plots
cp <- cowplot::plot_grid(plotlist = list_plots,
align = "hv",
axis = "trbl",
ncol = 4,
nrow = 3)
cp
options(repr.plot.width = 15, repr.plot.height = 22, warn=-1,verbose = FALSE)
genes = c("PTPRC","SMARCB1","TOP2A", "MKI67","CD3E", "CD3D",
"CD4", "CD8A","CD8B", "TRDC","TRGV1","TRGV2", "FOXP3")
FeaturePlot(seurat_obj, reduction = "umap",
feature=genes, order = T, ncol=3)
saveRDS(seurat_obj, here::here(glue::glue("{clust}/{robj_dir}/dimred_combined_object_{sample}.rds")))
sessionInfo()
R version 4.1.1 (2021-08-10) Platform: x86_64-conda-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /home/groups/singlecell/isentis/conda_envs/ines_r4.1.1c/lib/libopenblasp-r0.3.24.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=es_ES.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats4 grid stats graphics grDevices utils datasets [8] methods base other attached packages: [1] scran_1.22.1 scater_1.22.0 [3] scuttle_1.4.0 SingleCellExperiment_1.16.0 [5] SummarizedExperiment_1.24.0 Biobase_2.54.0 [7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 [9] IRanges_2.28.0 S4Vectors_0.32.4 [11] BiocGenerics_0.40.0 MatrixGenerics_1.6.0 [13] matrixStats_1.0.0 gridExtra_2.3 [15] lubridate_1.9.3 forcats_1.0.0 [17] stringr_1.5.0 dplyr_1.1.3 [19] purrr_1.0.2 readr_2.1.4 [21] tidyr_1.3.0 tibble_3.2.1 [23] ggplot2_3.4.4 tidyverse_2.0.0 [25] SeuratObject_4.1.4 Seurat_4.0.5 loaded via a namespace (and not attached): [1] uuid_1.1-1 plyr_1.8.9 [3] igraph_1.3.4 repr_1.1.6 [5] lazyeval_0.2.2 sp_2.1-0 [7] splines_4.1.1 BiocParallel_1.28.3 [9] listenv_0.9.0 scattermore_1.2 [11] digest_0.6.33 htmltools_0.5.6.1 [13] viridis_0.6.4 fansi_1.0.5 [15] magrittr_2.0.3 ScaledMatrix_1.2.0 [17] tensor_1.5 cluster_2.1.4 [19] ROCR_1.0-11 limma_3.50.3 [21] tzdb_0.4.0 globals_0.16.2 [23] timechange_0.2.0 spatstat.sparse_3.0-1 [25] colorspace_2.1-0 ggrepel_0.9.4 [27] crayon_1.5.2 RCurl_1.98-1.12 [29] jsonlite_1.8.7 progressr_0.14.0 [31] spatstat.data_3.0-1 survival_3.5-7 [33] zoo_1.8-12 glue_1.6.2 [35] polyclip_1.10-4 gtable_0.3.4 [37] zlibbioc_1.40.0 XVector_0.34.0 [39] leiden_0.4.3 DelayedArray_0.20.0 [41] BiocSingular_1.10.0 future.apply_1.11.0 [43] abind_1.4-5 scales_1.2.1 [45] edgeR_3.36.0 spatstat.random_3.1-5 [47] miniUI_0.1.1.1 Rcpp_1.0.10 [49] viridisLite_0.4.2 xtable_1.8-4 [51] dqrng_0.3.1 reticulate_1.34.0 [53] spatstat.core_2.4-2 rsvd_1.0.5 [55] metapod_1.2.0 htmlwidgets_1.6.2 [57] httr_1.4.7 RColorBrewer_1.1-3 [59] ellipsis_0.3.2 ica_1.0-3 [61] farver_2.1.1 pkgconfig_2.0.3 [63] uwot_0.1.16 deldir_1.0-9 [65] here_1.0.1 locfit_1.5-9.8 [67] utf8_1.2.3 labeling_0.4.3 [69] tidyselect_1.2.0 rlang_1.1.1 [71] reshape2_1.4.4 later_1.3.1 [73] munsell_0.5.0 tools_4.1.1 [75] cli_3.6.1 generics_0.1.3 [77] ggridges_0.5.4 evaluate_0.22 [79] fastmap_1.1.1 goftest_1.2-3 [81] fitdistrplus_1.1-11 RANN_2.6.1 [83] sparseMatrixStats_1.6.0 pbapply_1.7-2 [85] future_1.33.0 nlme_3.1-162 [87] mime_0.12 compiler_4.1.1 [89] beeswarm_0.4.0 plotly_4.10.2 [91] png_0.1-8 spatstat.utils_3.0-3 [93] statmod_1.5.0 stringi_1.7.12 [95] bluster_1.4.0 lattice_0.21-8 [97] IRdisplay_1.1 Matrix_1.6-1.1 [99] vctrs_0.6.4 pillar_1.9.0 [101] lifecycle_1.0.3 spatstat.geom_3.2-5 [103] lmtest_0.9-40 BiocNeighbors_1.12.0 [105] RcppAnnoy_0.0.21 data.table_1.14.8 [107] cowplot_1.1.1 bitops_1.0-7 [109] irlba_2.3.5.1 httpuv_1.6.11 [111] patchwork_1.1.3 R6_2.5.1 [113] promises_1.2.1 KernSmooth_2.23-22 [115] vipor_0.4.7 parallelly_1.36.0 [117] codetools_0.2-19 MASS_7.3-60 [119] rprojroot_2.0.3 withr_2.5.1 [121] sctransform_0.4.0 GenomeInfoDbData_1.2.7 [123] mgcv_1.8-42 parallel_4.1.1 [125] hms_1.1.3 beachmat_2.10.0 [127] rpart_4.1.19 IRkernel_1.3.2.9000 [129] DelayedMatrixStats_1.16.0 Cairo_1.6-2 [131] Rtsne_0.16 pbdZMQ_0.3-10 [133] shiny_1.7.5.1 base64enc_0.1-3 [135] ggbeeswarm_0.7.2